sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.34 R6_2.5.1 fastmap_1.1.1 xfun_0.42
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.7 rmarkdown_2.25
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.8 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.15.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.6.1 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
First, load the necessary packages.
# load libraries - notes show the install command needed to install (pre installed)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(grDevices)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:circlize':
##
## degree
## The following object is masked from 'package:plyr':
##
## join
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
biomin <- read.csv("../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv") %>% rename("Pocillopora_acuta_best_hit" = "gene_id" )
geneInfo <- read.csv("../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv")
biomin_mod <- merge(biomin, geneInfo, by=c("gene_id"), all=F)
head(biomin_mod)
## gene_id accessionnumber.geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 XP_022804785.1
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 P33_g8985
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 JR972076.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Gene:g13552
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 aug_v2a.06327.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 PFX18785.1
## definition
## 1 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 2 Flagellar associated protein
## 3 Acidic skeletal organic matrix protein (Acidic SOMP)
## 4 Acidic SOMP (Full-Length p27)
## 5 SAARP3
## 6 Mucin-4 [Stylophora pistillata]
## Ref X seqnames start
## 1 Peled et al., 2020 224 Pocillopora_acuta_HIv2___Sc0000021 2329764
## 2 Drake et al., 2013 604 Pocillopora_acuta_HIv2___Sc0000013 2026351
## 3 Ramos-Silva et al., 2013 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 4 Mummadisetti et al., 2021 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 5 Takeuchi et al., 2016 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 6 Peled et al., 2020 1240 Pocillopora_acuta_HIv2___Sc0000005 2364120
## end width strand source type score.x phase
## 1 2349234 19471 + AUGUSTUS transcript NA NA
## 2 2032291 5941 - AUGUSTUS transcript NA NA
## 3 7864189 3795 + AUGUSTUS transcript NA NA
## 4 7864189 3795 + AUGUSTUS transcript NA NA
## 5 7864189 3795 + AUGUSTUS transcript NA NA
## 6 2382635 18516 + AUGUSTUS transcript NA NA
## ID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## transcript_id seed_ortholog evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 45351.EDO38228 1.51e-306
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 6087.XP_002163848.1 4.11e-211
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 <NA> NA
## score.y
## 1 851
## 2 607
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## eggNOG_OGs
## 1 COG0695@1|root,COG1249@1|root,KOG1752@2759|Eukaryota,KOG4716@2759|Eukaryota,38BHT@33154|Opisthokonta,3BBN5@33208|Metazoa
## 2 28N88@1|root,2QUTJ@2759|Eukaryota,39ZRG@33154|Opisthokonta,3BMP5@33208|Metazoa
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## max_annot_lvl COG_category Description
## 1 33208|Metazoa C thioredoxin-disulfide reductase activity
## 2 33208|Metazoa - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## Preferred_name
## 1 TXNRD1
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GOs
## 1 GO:0000003;GO:0000302;GO:0000305;GO:0001650;GO:0001704;GO:0001707;GO:0001887;GO:0001890;GO:0003006;GO:0003674;GO:0003824;GO:0004791;GO:0005488;GO:0005515;GO:0005575;GO:0005622;GO:0005623;GO:0005634;GO:0005654;GO:0005730;GO:0005737;GO:0005739;GO:0005783;GO:0005829;GO:0006082;GO:0006139;GO:0006518;GO:0006520;GO:0006575;GO:0006725;GO:0006732;GO:0006733;GO:0006739;GO:0006749;GO:0006753;GO:0006790;GO:0006793;GO:0006796;GO:0006807;GO:0006950;GO:0006979;GO:0007154;GO:0007165;GO:0007275;GO:0007369;GO:0007498;GO:0008150;GO:0008152;GO:0008283;GO:0009056;GO:0009069;GO:0009117;GO:0009611;GO:0009628;GO:0009636;GO:0009653;GO:0009790;GO:0009888;GO:0009987;GO:0010035;GO:0010038;GO:0010269;GO:0010941;GO:0010942;GO:0012505;GO:0015036;GO:0015949;GO:0016043;GO:0016174;GO:0016209;GO:0016259;GO:0016491;GO:0016651;GO:0016667;GO:0016668;GO:0016999;GO:0017001;GO:0017144;GO:0018996;GO:0019216;GO:0019222;GO:0019362;GO:0019637;GO:0019725;GO:0019752;GO:0022404;GO:0022414;GO:0022607;GO:0023052;GO:0031974;GO:0031981;GO:0032501;GO:0032502;GO:0033554;GO:0033797;GO:0034599;GO:0034641;GO:0036295;GO:0036296;GO:0036477;GO:0042221;GO:0042303;GO:0042395;GO:0042493;GO:0042537;GO:0042592;GO:0042737;GO:0042743;GO:0042744;GO:0042802;GO:0042803;GO:0043025;GO:0043167;GO:0043169;GO:0043226;GO:0043227;GO:0043228;GO:0043229;GO:0043231;GO:0043232;GO:0043233;GO:0043436;GO:0043603;GO:0043933;GO:0044085;GO:0044237;GO:0044238;GO:0044248;GO:0044281;GO:0044297;GO:0044422;GO:0044424;GO:0044428;GO:0044444;GO:0044446;GO:0044452;GO:0044464;GO:0045340;GO:0045454;GO:0046483;GO:0046496;GO:0046688;GO:0046872;GO:0046914;GO:0046983;GO:0048332;GO:0048513;GO:0048518;GO:0048522;GO:0048598;GO:0048608;GO:0048646;GO:0048678;GO:0048729;GO:0048731;GO:0048856;GO:0050664;GO:0050789;GO:0050794;GO:0050896;GO:0051186;GO:0051187;GO:0051259;GO:0051262;GO:0051716;GO:0055086;GO:0055093;GO:0055114;GO:0061458;GO:0065003;GO:0065007;GO:0065008;GO:0070013;GO:0070276;GO:0070482;GO:0070887;GO:0070995;GO:0071241;GO:0071248;GO:0071280;GO:0071453;GO:0071455;GO:0071704;GO:0071840;GO:0072524;GO:0072593;GO:0080090;GO:0097237;GO:0097458;GO:0098623;GO:0098625;GO:0098626;GO:0098754;GO:0098869;GO:1901360;GO:1901564;GO:1901605;GO:1901700;GO:1990748
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## EC KEGG_ko KEGG_Pathway
## 1 1.8.1.9 ko:K22182 ko00450,ko05200,ko05225,map00450,map05200,map05225
## 2 - - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC
## 1 - R03596,R09372 RC02518,RC02873 ko00000,ko00001,ko01000 -
## 2 - - - - -
## 3 <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## CAZy BiGG_Reaction PFAMs KEGG_new
## 1 - - Glutaredoxin,Pyr_redox_2,Pyr_redox_dim K22182
## 2 - - - <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> K22017
## substanceBXH geneSymbol
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 Pocillopora_acuta_HIv2___Sc0000021
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 Pocillopora_acuta_HIv2___Sc0000013
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 Pocillopora_acuta_HIv2___Sc0000005
## moduleColor
## 1 brown
## 2 turquoise
## 3 red
## 4 red
## 5 red
## 6 pink
## GO.terms
## 1 GO:0000003,GO:0000302,GO:0000305,GO:0001650,GO:0001704,GO:0001707,GO:0001887,GO:0001890,GO:0003006,GO:0003674,GO:0003824,GO:0004791,GO:0005488,GO:0005515,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0005737,GO:0005739,GO:0005783,GO:0005829,GO:0006082,GO:0006139,GO:0006518,GO:0006520,GO:0006575,GO:0006725,GO:0006732,GO:0006733,GO:0006739,GO:0006749,GO:0006753,GO:0006790,GO:0006793,GO:0006796,GO:0006807,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007275,GO:0007369,GO:0007498,GO:0008150,GO:0008152,GO:0008283,GO:0009056,GO:0009069,GO:0009117,GO:0009611,GO:0009628,GO:0009636,GO:0009653,GO:0009790,GO:0009888,GO:0009987,GO:0010035,GO:0010038,GO:0010269,GO:0010941,GO:0010942,GO:0012505,GO:0015036,GO:0015949,GO:0016043,GO:0016174,GO:0016209,GO:0016259,GO:0016491,GO:0016651,GO:0016667,GO:0016668,GO:0016999,GO:0017001,GO:0017144,GO:0018996,GO:0019216,GO:0019222,GO:0019362,GO:0019637,GO:0019725,GO:0019752,GO:0022404,GO:0022414,GO:0022607,GO:0023052,GO:0031974,GO:0031981,GO:0032501,GO:0032502,GO:0033554,GO:0033797,GO:0034599,GO:0034641,GO:0036295,GO:0036296,GO:0036477,GO:0042221,GO:0042303,GO:0042395,GO:0042493,GO:0042537,GO:0042592,GO:0042737,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0043025,GO:0043167,GO:0043169,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043436,GO:0043603,GO:0043933,GO:0044085,GO:0044237,GO:0044238,GO:0044248,GO:0044281,GO:0044297,GO:0044422,GO:0044424,GO:0044428,GO:0044444,GO:0044446,GO:0044452,GO:0044464,GO:0045340,GO:0045454,GO:0046483,GO:0046496,GO:0046688,GO:0046872,GO:0046914,GO:0046983,GO:0048332,GO:0048513,GO:0048518,GO:0048522,GO:0048598,GO:0048608,GO:0048646,GO:0048678,GO:0048729,GO:0048731,GO:0048856,GO:0050664,GO:0050789,GO:0050794,GO:0050896,GO:0051186,GO:0051187,GO:0051259,GO:0051262,GO:0051716,GO:0055086,GO:0055093,GO:0055114,GO:0061458,GO:0065003,GO:0065007,GO:0065008,GO:0070013,GO:0070276,GO:0070482,GO:0070887,GO:0070995,GO:0071241,GO:0071248,GO:0071280,GO:0071453,GO:0071455,GO:0071704,GO:0071840,GO:0072524,GO:0072593,GO:0080090,GO:0097237,GO:0097458,GO:0098623,GO:0098625,GO:0098626,GO:0098754,GO:0098869,GO:1901360,GO:1901564,GO:1901605,GO:1901700,GO:1990748
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GO.description GS.Flat GS.Slope p.GS.Flat
## 1 thioredoxin-disulfide reductase activity 0.57178848 -0.57178848 3.311055e-05
## 2 - -0.29586493 0.29586493 4.589336e-02
## 3 <NA> 0.35628512 -0.35628512 1.508700e-02
## 4 <NA> 0.35628512 -0.35628512 1.508700e-02
## 5 <NA> 0.35628512 -0.35628512 1.508700e-02
## 6 <NA> -0.05455251 0.05455251 7.187880e-01
## p.GS.Slope A.brown p.A.brown A.magenta p.A.magenta A.red
## 1 3.311055e-05 0.7005073 5.973619e-08 -0.3738439 1.048844e-02 0.2901298
## 2 4.589336e-02 -0.4291375 2.921081e-03 0.3115539 3.505853e-02 -0.3452015
## 3 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 4 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 5 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 6 7.187880e-01 0.0972208 5.203783e-01 -0.3252127 2.743096e-02 0.3709019
## p.A.red A.turquoise p.A.turquoise A.purple p.A.purple A.green
## 1 5.047695e-02 -0.43323233 2.634293e-03 0.6984202 6.792759e-08 0.4574538
## 2 1.879503e-02 0.58815287 1.720729e-05 -0.1784560 2.353887e-01 -0.1306835
## 3 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 4 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 5 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 6 1.116224e-02 0.08164806 5.895928e-01 -0.1391597 3.563456e-01 0.1614616
## p.A.green A.lightcyan p.A.lightcyan A.pink p.A.pink A.blue
## 1 0.001391986 -0.3508191 1.682948e-02 0.1707384 2.565893e-01 0.12358439
## 2 0.386672688 0.1196505 4.283449e-01 -0.1522331 3.125037e-01 -0.58598406
## 3 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 4 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 5 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 6 0.283719167 -0.5276145 1.646022e-04 0.6417477 1.537006e-06 -0.02286640
## p.A.blue A.salmon p.A.salmon A.midnightblue p.A.midnightblue
## 1 4.132051e-01 0.1178467 0.435389343 0.2439890 0.10224333
## 2 1.880492e-05 0.1907995 0.204028320 0.2258109 0.13131383
## 3 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 4 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 5 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 6 8.801027e-01 0.2940377 0.047315397 0.2906592 0.05003895
## A.black p.A.black A.cyan p.A.cyan A.yellow p.A.yellow
## 1 -0.28430645 0.05550307 0.04904562 0.7461773 0.05522073 0.7154873547
## 2 -0.18825739 0.21023361 0.07386502 0.6256510 -0.14392338 0.3399558326
## 3 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 4 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 5 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 6 0.02103556 0.88964060 -0.13338389 0.3768501 -0.46998526 0.0009821983
## A.tan p.A.tan Length
## 1 0.2648346 0.075293267 19471
## 2 0.2613466 0.079363532 5941
## 3 -0.2055446 0.170565420 3795
## 4 -0.2055446 0.170565420 3795
## 5 -0.2055446 0.170565420 3795
## 6 -0.3805358 0.009084622 18516
length(unique(biomin_mod$gene_id))
## [1] 65
plyr::count(biomin_mod, "moduleColor")
## moduleColor freq
## 1 black 3
## 2 blue 36
## 3 brown 17
## 4 cyan 2
## 5 green 3
## 6 magenta 1
## 7 pink 7
## 8 red 18
## 9 salmon 6
## 10 tan 3
## 11 turquoise 21
## 12 yellow 10
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector_biomin <- biomin_mod %>%
pull(gene_id) %>% unique()
##Get a list of GO Terms for each module
GO.terms_biomin <- biomin_mod %>%
dplyr::select(GOs,gene_id) %>% unique() %>% rename(GOs = "GO.terms")
dim(GO.terms_biomin)
## [1] 65 2
dim(GO.terms_biomin %>% filter(is.na(GO.terms)))
## [1] 43 2
dim(GO.terms_biomin %>% filter(!is.na(GO.terms)))
## [1] 22 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_biomin$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms_biomin$gene_id, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene_id", "GO.terms")
GO.terms_biomin <- split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector = as.integer(ALL.vector %in% ID.vector_biomin)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector_biomin, gene2cat=GO.terms_biomin, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
head(GO)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 102 GO:0003674 0 1 20
## 108 GO:0003824 0 1 10
## 126 GO:0005488 0 1 14
## 130 GO:0005515 0 1 12
## 133 GO:0005575 0 1 21
## 136 GO:0005622 0 1 15
## numInCat term ontology
## 102 20 molecular_function MF
## 108 10 catalytic activity MF
## 126 14 binding MF
## 130 12 protein binding MF
## 133 21 cellular_component CC
## 136 15 intracellular anatomical structure CC
#adjust p-values
GO$bh_adjust <- p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(bh_adjust<0.05) %>%
dplyr::arrange(., ontology, bh_adjust)
#Filtering for p < 0.00001
GO_00001 <- GO %>%
dplyr::filter(bh_adjust<0.00001) %>%
dplyr::arrange(., ontology, bh_adjust)
#Write file of results
write.csv(GO_00001, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
go_results <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
head(go_results)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807 0 1 11
## 2 2 GO:0006810 0 1 7
## 3 3 GO:0006811 0 1 7
## 4 4 GO:0006873 0 1 6
## 5 5 GO:0006996 0 1 6
## 6 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
go_results_BP <- go_results %>%
filter(ontology=="BP") %>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP)
## [1] 186 9
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807 0 1 11
## 2 2 GO:0006810 0 1 7
## 3 3 GO:0006811 0 1 7
## 4 4 GO:0006873 0 1 6
## 5 5 GO:0006996 0 1 6
## 6 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/biomin_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
dev.off()
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
##
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$bh_adjust), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 180 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 32
The reduced list of terms is 180 terms that falls under 32 parent terms.
#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
#go_results_BP_reduced <- go_results_BP_reduced %>% filter(grepl(paste(keywords, collapse="|"), ParentTerm))
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_biomin <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=bh_adjust)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_biomin
GO_00001_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
GO_00001_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
GO_00001_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_00001_up %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_up)
## [1] 1976 8
go_results_BP_down <- GO_00001_down %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_down)
## [1] 1824 8
go_results_BP_biomin <- GO_00001_biomin %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_biomin)
## [1] 186 8
all_enriched_terms_up_down_biomin <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm,go_results_BP_biomin$GOterm)
length(all_enriched_terms_up_down_biomin)
## [1] 3986
length(unique(all_enriched_terms_up_down_biomin))
## [1] 2149
By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules or the biomineralization genes.
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down_biomin),
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
reducedTerms <- reduceSimMatrix(simMatrix,
scores = "size",
threshold=0.7,
orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 1817 10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 133
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 136
#keep only the goterms from the reduced list
go_results_BP_biomin_reduced <- go_results_BP_biomin %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_biomin_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_biomin_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_biomin_reduced$ParentTerm))
## [1] 55
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#write.csv(go_results_BP_biomin_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>%
mutate(Factor = "Up")
cal_up_terms_parent_nterm <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Up") %>% ungroup()
head(cal_up_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 1215
## 2 GO:0007275 0 1 978
## 3 GO:0008152 0 1 1387
## 4 GO:0009987 0 1 1938
## 5 GO:0016043 0 1 1049
## 6 GO:0019222 0 1 982
## numInCat term ontology bh_adjust
## 1 1215 nitrogen compound metabolic process BP 0
## 2 978 multicellular organism development BP 0
## 3 1387 metabolic process BP 0
## 4 1938 cellular process BP 0
## 5 1049 cellular component organization BP 0
## 6 982 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Up
## 2 cell differentiation Up
## 3 metabolic process Up
## 4 cellular process Up
## 5 mitochondrion organization Up
## 6 regulation of DNA-templated transcription Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
cal_down_terms_parent_nterm <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Down") %>% ungroup()
head(cal_down_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 910
## 2 GO:0007275 0 1 610
## 3 GO:0008152 0 1 998
## 4 GO:0009987 0 1 1277
## 5 GO:0016043 0 1 652
## 6 GO:0019222 0 1 599
## numInCat term ontology bh_adjust
## 1 910 nitrogen compound metabolic process BP 0
## 2 610 multicellular organism development BP 0
## 3 998 metabolic process BP 0
## 4 1277 cellular process BP 0
## 5 652 cellular component organization BP 0
## 6 599 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Down
## 2 cell differentiation Down
## 3 metabolic process Down
## 4 cellular process Down
## 5 mitochondrion organization Down
## 6 regulation of DNA-templated transcription Down
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.
#cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- go_results_BP_biomin_reduced
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin")
head(cal_biomin_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 11
## 2 GO:0006810 0 1 7
## 3 GO:0006811 0 1 7
## 4 GO:0006873 0 1 6
## 5 GO:0006996 0 1 6
## 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
## ParentTerm Factor
## 1 metabolic process Biomin
## 2 transmembrane transport Biomin
## 3 monoatomic ion transport Biomin
## 4 intracellular calcium ion homeostasis Biomin
## 5 mitochondrion organization Biomin
## 6 intracellular receptor signaling pathway Biomin
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.0001) within the modules that were upregulated or downregulated for calcification or in the biomineralization gene list. The list has been further reduced by using the package rrvgo.
all_terms <- rbind(cal_down_terms,cal_up_terms,cal_biomin_terms)
all_terms <- all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm")]
all_terms$GOterm<-as.factor(all_terms$GOterm)
dim(all_terms) #3453 reduced terms
## [1] 3453 10
length(unique(all_terms$GOterm)) #this represents 1817 unique terms between the three lists of reduced terms
## [1] 1817
length(unique(all_terms$ParentTerm)) #this represents 136 unique terms between the three lists of reduced terms
## [1] 136
This is collapsing the list from 3273 rows to only 1809, representing the 1809 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", "),
term = paste(unique(term), collapse = ", ")
)
dim(goterms_shared)
## [1] 1817 4
goterms_shared_full <- goterms_shared %>%
left_join(dplyr::select(all_terms,GOterm, Factor, bh_adjust), by = "GOterm") %>% #select 3 columns GOterm, Factor, bh_adjust from all_terms and left join by GOterm, turns this dataframe from 1817 rows back to the 3453 in all_terms
mutate(bh_adjust_Up = ifelse(Factor.y == "Up", bh_adjust, NA)) %>% #add a column that is the p-value for the Up factor
mutate(bh_adjust_Down = ifelse(Factor.y == "Down", bh_adjust, NA)) %>% #add a column that is the p-value for the Down factor
mutate(bh_adjust_Biomin = ifelse(Factor.y == "Biomin", bh_adjust, NA)) %>% #add a column that is the p-value for the Biomin factor
dplyr::select(-c("bh_adjust", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
dplyr::summarize(bh_adjust_Up = na.omit(bh_adjust_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
bh_adjust_Down = na.omit(bh_adjust_Down)[1], #carry over the p-value for the term in the down direction, by taking the first non-NA value.
bh_adjust_Biomin = na.omit(bh_adjust_Biomin)[1]) %>% #carry over the p-value for the term in the biomin list, by taking the first non-NA value.
rename(Factor.x = "Factor") #rename column
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")
goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()
goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Biomin")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
goterms_shared_only_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up, Biomin" | Factor=="Up, Biomin" | Factor=="Down, Biomin")
nrow(goterms_up)
## [1] 217
nrow(goterms_down)
## [1] 127
nrow(goterms_biomin)
## [1] 8
nrow(goterms_shared_only)
## [1] 1293
nrow(goterms_shared_only_biomin)
## [1] 172
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_biomin) + nrow(goterms_shared_only) + nrow(goterms_shared_only_biomin) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 75
length(unique(goterms_down$ParentTerm))
## [1] 56
length(unique(goterms_biomin$ParentTerm))
## [1] 4
length(unique(goterms_shared_only$ParentTerm))
## [1] 129
length(unique(goterms_shared_only_biomin$ParentTerm))
## [1] 55
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
parent_filtered_up <- result_parent_unique %>%
dplyr::filter(Factor == "Up" | Factor=="Up, Biomin")
#dplyr::filter(SharedGOterms>=5)
parent_filtered_down <- result_parent_unique %>%
dplyr::filter(Factor=="Down" | Factor=="Down, Biomin")
#dplyr::filter(SharedGOterms>=5)
merged_up <- parent_filtered_up %>%
dplyr::group_by(ParentTerm) %>%
left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
rename(SharedGOterms = "UniqueGOTerms") %>%
mutate(
Has_Biomin = any(Factor == "Up, Biomin"),
Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
Factor %in% c("Up, Biomin"),
UniqueGOTerms / Sum_of_UniqueGOterms,
0
)
) %>%
mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up"))
merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 75 × 9
## # Groups: ParentTerm [75]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 lipid metabolic … Up 12 28 Up
## 2 cell cycle Up 10 50 Up
## 3 intracellular si… Up 8 34 Up
## 4 nuclear migration Up 8 16 Up
## 5 mitochondrion or… Up 7 31 Up
## 6 transmembrane re… Up 7 22 Up
## 7 nervous system d… Up 6 31 Up
## 8 nucleoside metab… Up 6 22 Up
## 9 protein import i… Up 6 31 Up
## 10 synapse organiza… Up 6 15 Up
## # ℹ 65 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
cal_freq_terms_filtered_up <- merged_up_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 75 × 9
## # Groups: ParentTerm [75]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 lipid metabolic … Up 12 28 Up
## 2 cell cycle Up 10 50 Up
## 3 intracellular si… Up 8 34 Up
## 4 nuclear migration Up 8 16 Up
## 5 mitochondrion or… Up 7 31 Up
## 6 transmembrane re… Up 7 22 Up
## 7 nervous system d… Up 6 31 Up
## 8 nucleoside metab… Up 6 22 Up
## 9 protein import i… Up 6 31 Up
## 10 synapse organiza… Up 6 15 Up
## # ℹ 65 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique.pdf", plot=freq_fig_up, dpi=300, height=20, width=10, units="in", limitsize=FALSE)
cal_freq_terms_filtered_down <- merged_down_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 56 × 9
## # Groups: ParentTerm [56]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 fatty acid metab… Down 16 56 Down
## 2 intracellular si… Down 6 32 Down
## 3 protein ubiquiti… Down 5 17 Down
## 4 mitochondrion or… Down 4 28 Down
## 5 regulation of DN… Down 4 55 Down
## 6 regulation of tr… Down 4 24 Down
## 7 ribosome biogene… Down 4 19 Down
## 8 sensory percepti… Down 4 19 Down
## 9 IRE1-mediated un… Down 3 55 Down
## 10 carbohydrate met… Down 3 12 Down
## # ℹ 46 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique.pdf", plot=freq_fig_down, dpi=300, height=20, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs